In [914]:
!pip install arch
Requirement already satisfied: arch in /usr/local/lib/python3.7/dist-packages (5.1.0)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from arch) (1.19.5)
Requirement already satisfied: property-cached>=1.6.4 in /usr/local/lib/python3.7/dist-packages (from arch) (1.6.4)
Requirement already satisfied: statsmodels>=0.11 in /usr/local/lib/python3.7/dist-packages (from arch) (0.13.1)
Requirement already satisfied: scipy>=1.3 in /usr/local/lib/python3.7/dist-packages (from arch) (1.4.1)
Requirement already satisfied: pandas>=1.0 in /usr/local/lib/python3.7/dist-packages (from arch) (1.1.5)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0->arch) (2.8.2)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0->arch) (2018.9)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.0->arch) (1.15.0)
Requirement already satisfied: patsy>=0.5.2 in /usr/local/lib/python3.7/dist-packages (from statsmodels>=0.11->arch) (0.5.2)
In [915]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date
from scipy import stats

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timedelta
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import MinMaxScaler
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import ipywidgets as widgets
from IPython.display import display
In [916]:
w = widgets.Dropdown(
    options=['SELECT','AAPL', 'ABUS', 'ARDS', 'BABA','BFRI', 
             'FB', 'GME', 'MCD','PFE', 'PLUG', 
             'QCOM', 'SENS','TSLA', 'TWTR', 'UUUU'],
    value='SELECT',
    description ='Stock name:',

)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print("You have selected %s" % change['new'])

w.observe(on_change)

display(w)
You have selected AAPL
In [929]:
if(w.value == 'AAPL'):
  df = pd.read_csv('/content/Final_AAPL.csv')
if(w.value == 'ABUS'):
  df = pd.read_csv('/content/Final_ABUS.csv')
if(w.value == 'ARDS'):
  df = pd.read_csv('/content/Final_ARDS.csv')
if(w.value == 'BABA'):
  df = pd.read_csv('/content/Final_BABA.csv')
if(w.value == 'BFRI'):
  df = pd.read_csv('/content/Final_BFRI.csv')
if(w.value == 'FB'):
  df = pd.read_csv('/content/Final_FB.csv')
if(w.value == 'GME'):
  df = pd.read_csv('/content/Final_GME.csv')
if(w.value == 'MCD'):
  df = pd.read_csv('/content/Final_MCD.csv')
if(w.value == 'PFE'):
  df = pd.read_csv('/content/Final_PFE.csv')
if(w.value == 'PLUG'):
  df = pd.read_csv('/content/Final_PLUG.csv')
if(w.value == 'QCOM'):
  df = pd.read_csv('/content/Final_QCOM.csv')
if(w.value == 'SENS'):
  df = pd.read_csv('/content/Final_SENS.csv')
if(w.value == 'TSLA'):
  df = pd.read_csv('/content/Final_TSLA.csv')
if(w.value == 'TWTR'):
  df = pd.read_csv('/content/Final_TWTR.csv')
if(w.value == 'UUUU'):
  df = pd.read_csv('/content/Final_UUUU.csv')
In [930]:
pd.set_option('display.max_colwidth', None)
In [931]:
df['Date'] = df['Date'].astype("datetime64[ns]")
In [932]:
del df['Unnamed: 0']
In [933]:
df.head(5)
Out[933]:
Date Open High Low Close Adj Close Volume Return Beta Variance AvgTrueRange Upperband Lowerband Middleband APO NATR TRANGE DMI MACD MACDSIGNAL MACDHIST MOM PPO ROCP RSI TRIX ULTOSC SLOWK SLOWD AD ADOSC OBV Upward_momentum_created Downward_momentum_created B5_O_Um B5_C_Um B5_E_Um B5_A_Um B5_N_Um B5_O_Dm B5_C_Dm B5_E_Dm B5_A_Dm B5_N_Dm Verified_status_True Verified_status_False O C E A N Real_or_Fake_tweet
0 2020-03-23 57.020000 57.125000 53.152500 56.092499 55.402668 336752800 -2.124416 0.629900 16.357354 5.010935 69.449560 53.271869 61.360715 NaN 8.933342 4.157501 65.765014 NaN NaN NaN -10.450005 NaN -0.157043 35.353916 NaN NaN 16.006750 18.466645 -2.580409e+08 -1.119659e+08 -2.343889e+09 0.0 5.021563e+06 0.0 0.0 0.0 0.0 0.0 5.021563e+06 0.0 5.021563e+06 0.0 5.021563e+06 6 367 373 0 373 0 373 373
1 2020-03-24 59.090000 61.922501 58.575001 61.720001 60.960964 287531200 10.032540 0.893522 5.696339 5.127944 65.023758 55.476957 60.250357 NaN 8.308399 5.830002 23.115086 NaN NaN NaN -9.614998 NaN -0.134787 42.673833 NaN NaN 38.436086 23.058723 -5.296803e+06 -1.673491e+07 -2.056358e+09 0.0 1.436755e+07 0.0 0.0 0.0 0.0 0.0 1.436755e+07 0.0 1.436755e+07 0.0 1.436755e+07 27 988 1015 0 1015 0 1015 1015
2 2020-03-25 62.687500 64.562500 61.075001 61.380001 60.625141 303602000 -0.550875 0.936120 5.851622 4.893595 65.206597 55.530546 60.368572 NaN 7.972621 3.487499 5.661738 NaN NaN NaN -7.477497 NaN -0.108594 42.361738 NaN NaN 61.142115 38.528317 -2.557956e+08 -5.595957e+07 -2.359960e+09 0.0 2.020403e+07 0.0 0.0 0.0 0.0 0.0 2.020403e+07 0.0 2.020403e+07 0.0 2.020403e+07 43 1046 1089 0 1089 0 1089 1089
3 2020-03-26 61.630001 64.669998 61.590000 64.610001 63.815426 252087200 5.262300 0.934195 7.224416 4.664509 65.943512 55.192203 60.567857 NaN 7.219485 3.289997 4.954909 NaN NaN NaN 2.552502 NaN 0.041131 46.374158 NaN NaN 85.590696 61.722966 -1.352958e+07 1.016586e+07 -2.107872e+09 0.0 1.360886e+07 0.0 0.0 0.0 0.0 0.0 1.360886e+07 0.0 1.360886e+07 0.0 1.360886e+07 19 956 975 0 975 0 975 975
4 2020-03-27 63.187500 63.967499 61.762501 61.935001 61.173317 204216800 -4.140225 0.961334 7.317223 4.404937 66.016145 55.195999 60.606072 NaN 7.112193 2.847500 4.954909 NaN NaN NaN -7.557499 NaN -0.108753 43.663234 NaN NaN 82.613478 76.448763 -1.857939e+08 -1.851839e+07 -2.312089e+09 0.0 8.587940e+06 0.0 0.0 0.0 0.0 0.0 8.587940e+06 0.0 8.587940e+06 0.0 8.587940e+06 13 769 782 0 782 0 782 782
In [934]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 441 entries, 0 to 440
Data columns (total 52 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Date                       441 non-null    datetime64[ns]
 1   Open                       441 non-null    float64       
 2   High                       441 non-null    float64       
 3   Low                        441 non-null    float64       
 4   Close                      441 non-null    float64       
 5   Adj Close                  441 non-null    float64       
 6   Volume                     441 non-null    int64         
 7   Return                     441 non-null    float64       
 8   Beta                       441 non-null    float64       
 9   Variance                   441 non-null    float64       
 10  AvgTrueRange               441 non-null    float64       
 11  Upperband                  441 non-null    float64       
 12  Lowerband                  441 non-null    float64       
 13  Middleband                 441 non-null    float64       
 14  APO                        436 non-null    float64       
 15  NATR                       441 non-null    float64       
 16  TRANGE                     441 non-null    float64       
 17  DMI                        441 non-null    float64       
 18  MACD                       428 non-null    float64       
 19  MACDSIGNAL                 428 non-null    float64       
 20  MACDHIST                   428 non-null    float64       
 21  MOM                        441 non-null    float64       
 22  PPO                        436 non-null    float64       
 23  ROCP                       441 non-null    float64       
 24  RSI                        441 non-null    float64       
 25  TRIX                       373 non-null    float64       
 26  ULTOSC                     433 non-null    float64       
 27  SLOWK                      441 non-null    float64       
 28  SLOWD                      441 non-null    float64       
 29  AD                         441 non-null    float64       
 30  ADOSC                      441 non-null    float64       
 31  OBV                        441 non-null    float64       
 32  Upward_momentum_created    441 non-null    float64       
 33  Downward_momentum_created  441 non-null    float64       
 34  B5_O_Um                    441 non-null    float64       
 35  B5_C_Um                    441 non-null    float64       
 36  B5_E_Um                    441 non-null    float64       
 37  B5_A_Um                    441 non-null    float64       
 38  B5_N_Um                    441 non-null    float64       
 39  B5_O_Dm                    441 non-null    float64       
 40  B5_C_Dm                    441 non-null    float64       
 41  B5_E_Dm                    441 non-null    float64       
 42  B5_A_Dm                    441 non-null    float64       
 43  B5_N_Dm                    441 non-null    float64       
 44  Verified_status_True       441 non-null    int64         
 45  Verified_status_False      441 non-null    int64         
 46  O                          441 non-null    int64         
 47  C                          441 non-null    int64         
 48  E                          441 non-null    int64         
 49  A                          441 non-null    int64         
 50  N                          441 non-null    int64         
 51  Real_or_Fake_tweet         441 non-null    int64         
dtypes: datetime64[ns](1), float64(42), int64(9)
memory usage: 179.3 KB
In [935]:
df.shape
Out[935]:
(441, 52)
In [936]:
sns.set(font_scale=0.8)
In [937]:
# CHANGE CONTEXT TO poster TO INCREASE FONT SIZES
sns.set_context("talk", font_scale=1.3)

# PLOT OUT BTC-USE'S CLOSING PRICES SINCE 2014
with sns.axes_style("darkgrid"):
    fig, ax = plt.subplots(figsize=(18,8))
    sns.lineplot(x=df.Date, y=df.Close, color='blue')
    ax.set_title('Closing Price')    
In [938]:
# CALCULATE PRICE RETURNS AS DAILY PERCENTAGE CHANGE USING pct_change()
df['returns'] = 100 * df.Close.pct_change().dropna()
In [939]:
# CALCULATE LOG RETURNS BASED ON ABOVE FORMULA
df['log_returns'] = np.log(df.Close/df.Close.shift(1))
In [940]:
df.head()
Out[940]:
Date Open High Low Close Adj Close Volume Return Beta Variance AvgTrueRange Upperband Lowerband Middleband APO NATR TRANGE DMI MACD MACDSIGNAL MACDHIST MOM PPO ROCP RSI TRIX ULTOSC SLOWK SLOWD AD ADOSC OBV Upward_momentum_created Downward_momentum_created B5_O_Um B5_C_Um B5_E_Um B5_A_Um B5_N_Um B5_O_Dm B5_C_Dm B5_E_Dm B5_A_Dm B5_N_Dm Verified_status_True Verified_status_False O C E A N Real_or_Fake_tweet returns log_returns
0 2020-03-23 57.020000 57.125000 53.152500 56.092499 55.402668 336752800 -2.124416 0.629900 16.357354 5.010935 69.449560 53.271869 61.360715 NaN 8.933342 4.157501 65.765014 NaN NaN NaN -10.450005 NaN -0.157043 35.353916 NaN NaN 16.006750 18.466645 -2.580409e+08 -1.119659e+08 -2.343889e+09 0.0 5.021563e+06 0.0 0.0 0.0 0.0 0.0 5.021563e+06 0.0 5.021563e+06 0.0 5.021563e+06 6 367 373 0 373 0 373 373 NaN NaN
1 2020-03-24 59.090000 61.922501 58.575001 61.720001 60.960964 287531200 10.032540 0.893522 5.696339 5.127944 65.023758 55.476957 60.250357 NaN 8.308399 5.830002 23.115086 NaN NaN NaN -9.614998 NaN -0.134787 42.673833 NaN NaN 38.436086 23.058723 -5.296803e+06 -1.673491e+07 -2.056358e+09 0.0 1.436755e+07 0.0 0.0 0.0 0.0 0.0 1.436755e+07 0.0 1.436755e+07 0.0 1.436755e+07 27 988 1015 0 1015 0 1015 1015 10.032540 0.095606
2 2020-03-25 62.687500 64.562500 61.075001 61.380001 60.625141 303602000 -0.550875 0.936120 5.851622 4.893595 65.206597 55.530546 60.368572 NaN 7.972621 3.487499 5.661738 NaN NaN NaN -7.477497 NaN -0.108594 42.361738 NaN NaN 61.142115 38.528317 -2.557956e+08 -5.595957e+07 -2.359960e+09 0.0 2.020403e+07 0.0 0.0 0.0 0.0 0.0 2.020403e+07 0.0 2.020403e+07 0.0 2.020403e+07 43 1046 1089 0 1089 0 1089 1089 -0.550875 -0.005524
3 2020-03-26 61.630001 64.669998 61.590000 64.610001 63.815426 252087200 5.262300 0.934195 7.224416 4.664509 65.943512 55.192203 60.567857 NaN 7.219485 3.289997 4.954909 NaN NaN NaN 2.552502 NaN 0.041131 46.374158 NaN NaN 85.590696 61.722966 -1.352958e+07 1.016586e+07 -2.107872e+09 0.0 1.360886e+07 0.0 0.0 0.0 0.0 0.0 1.360886e+07 0.0 1.360886e+07 0.0 1.360886e+07 19 956 975 0 975 0 975 975 5.262300 0.051285
4 2020-03-27 63.187500 63.967499 61.762501 61.935001 61.173317 204216800 -4.140225 0.961334 7.317223 4.404937 66.016145 55.195999 60.606072 NaN 7.112193 2.847500 4.954909 NaN NaN NaN -7.557499 NaN -0.108753 43.663234 NaN NaN 82.613478 76.448763 -1.857939e+08 -1.851839e+07 -2.312089e+09 0.0 8.587940e+06 0.0 0.0 0.0 0.0 0.0 8.587940e+06 0.0 8.587940e+06 0.0 8.587940e+06 13 769 782 0 782 0 782 782 -4.140225 -0.042284
In [941]:
# DROPPING THE 1ST ROW OF DATA 
# BECAUSE I SHIFTED IT FORWARD TO CALCULATE RETURNS/LOG RETURNS
df.dropna(inplace=True)
In [942]:
# PLOT DISTRIBUTION PLOTS OF RETURNS & LOG RETURNS
# AND VISUALLY COMPARE THEM WITH THE STANDARD NORMAL DISTRIBUTION
with sns.axes_style("darkgrid"):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(18,12))

    axes[0][0].plot(df.returns, color='blue')
    axes[0][0].set_title('Returns')

    sns.distplot(df.returns, norm_hist=True, fit=stats.norm, color='blue',
                bins=50, ax=axes[0][1])
    axes[0][1].set_title('Returns')

    axes[1][0].plot(df.log_returns, color='green')
    axes[1][0].set_title('Log Returns')

    sns.distplot(df.log_returns, norm_hist=True, fit=stats.norm, color='green',
                bins=50, ax=axes[1][1])
    axes[1][1].set_title('Log Returns')
    plt.tight_layout()
    fig.show();
In [943]:
# CREATE A FUNCTION THAT CALCULATE REALIZED VOLATILITY
# FROM SAILY LOG RETURNS
def realized_volatility_daily(series_log_return):
    """
    Get the daily realized volatility which is calculated as the square root
    of sum of squares of log returns within a specific window interval 
    """
    n = len(series_log_return)
    return np.sqrt(np.sum(series_log_return**2)/(n - 1))
In [944]:
intervals = [7, 30, 60, 180, 365]
vols_df = {}

# ITERATE OVER intervals LIST
for i in intervals:
    # GET DAILY LOG RETURNS USING THAT INTERVAL
    vols = df.log_returns.rolling(window=i)\
                         .apply(realized_volatility_daily).values

    vols_df[i] = vols

# CONVERT vols_df FROM DICTIONARY TO PANDAS DATAFRAME
vols_df = pd.DataFrame(vols_df, columns=intervals, index=df.index)
In [945]:
# CHANGING MATPLOTLIB STYLE
plt.style.use(['fivethirtyeight'])

fig, ax = plt.subplots(figsize=(18,7))

for i in intervals:
    if i == 7:
        alpha = 0.5
        lw = 1
    else:
        alpha = 1.0
        lw = 2
    ax.plot(vols_df[i], label=f'{i}-Day Interval Realized Volatility', 
            alpha=alpha, lw=lw)

ax.set_title('Realized Volatility Using Different Interval Windows', fontsize=21)

plt.legend(loc='best', prop={'size': 14})
plt.show();
In [946]:
INTERVAL_WINDOW = 30
n_future = 7

# GET BACKWARD LOOKING REALIZED VOLATILITY
df['vol_current'] = df.log_returns.rolling(window=INTERVAL_WINDOW)\
                                   .apply(realized_volatility_daily)

# GET FORWARD LOOKING REALIZED VOLATILITY 
df['vol_future'] = df.log_returns.shift(-n_future)\
                                 .rolling(window=INTERVAL_WINDOW)\
                                 .apply(realized_volatility_daily)
In [947]:
df.describe()
Out[947]:
Open High Low Close Adj Close Volume Return Beta Variance AvgTrueRange Upperband Lowerband Middleband APO NATR TRANGE DMI MACD MACDSIGNAL MACDHIST MOM PPO ROCP RSI TRIX ULTOSC SLOWK SLOWD AD ADOSC OBV Upward_momentum_created Downward_momentum_created B5_O_Um B5_C_Um B5_E_Um B5_A_Um B5_N_Um B5_O_Dm B5_C_Dm B5_E_Dm B5_A_Dm B5_N_Dm Verified_status_True Verified_status_False O C E A N Real_or_Fake_tweet returns log_returns vol_current vol_future
count 373.000000 373.000000 373.000000 373.000000 373.000000 3.730000e+02 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 3.730000e+02 3.730000e+02 3.730000e+02 373.0 3.730000e+02 373.0 373.0 373.0 373.0 373.0 3.730000e+02 373.0 3.730000e+02 373.0 3.730000e+02 373.000000 373.000000 373.000000 373.0 373.000000 373.0 373.000000 373.000000 373.000000 373.000000 344.000000 337.000000
mean 130.791998 132.289451 129.274222 130.838921 130.280209 1.091079e+08 0.196225 0.763967 6.495903 3.258898 134.624488 125.696830 130.160659 1.507587 2.541936 3.312500 36.309697 1.481322 1.454243 0.027079 2.279759 1.251722 0.019799 56.713070 0.193735 53.693181 56.931604 56.983372 1.668262e+09 1.222484e+07 1.163158e+09 0.0 2.962218e+07 0.0 0.0 0.0 0.0 0.0 2.962218e+07 0.0 2.962218e+07 0.0 2.962218e+07 39.337802 1879.943700 1919.281501 0.0 1919.281501 0.0 1919.281501 1919.281501 0.196225 0.001771 0.018705 0.018546
std 17.309654 17.431134 17.291613 17.376804 17.625371 4.952592e+07 1.953292 0.399802 8.384719 0.976497 17.292387 17.307655 17.123570 3.439116 0.874476 1.787296 22.032415 2.282406 2.071192 0.779085 7.087490 2.797648 0.057755 11.400372 0.194147 9.237370 24.247599 22.261619 2.990677e+08 6.505526e+07 5.533949e+08 0.0 3.420458e+07 0.0 0.0 0.0 0.0 0.0 3.420458e+07 0.0 3.420458e+07 0.0 3.420458e+07 31.336769 807.629326 832.903826 0.0 832.903826 0.0 832.903826 832.903826 1.953292 0.019495 0.006477 0.006448
min 88.312500 90.542503 87.820000 90.445000 89.574516 4.100000e+07 -8.006086 -0.483506 0.100885 1.862693 92.352753 85.935328 89.836785 -6.866250 1.356956 0.862495 0.140086 -3.539473 -2.963971 -2.510746 -22.049995 -5.812268 -0.164331 28.279096 -0.097785 30.909286 5.175548 11.363815 8.367979e+08 -1.871199e+08 -6.199268e+08 0.0 8.247484e+06 0.0 0.0 0.0 0.0 0.0 8.247484e+06 0.0 8.247484e+06 0.0 8.247484e+06 10.000000 797.000000 807.000000 0.0 807.000000 0.0 807.000000 807.000000 -8.006086 -0.083448 0.010033 0.010033
25% 119.900002 121.169998 118.150002 119.900002 119.343437 7.583400e+07 -0.777838 0.511713 1.881669 2.595455 123.673900 114.227710 119.188571 -0.445129 1.946684 2.029999 18.021302 -0.166436 -0.099720 -0.500311 -2.319992 -0.359148 -0.016674 48.301657 0.042848 47.230665 36.484828 38.489746 1.511316e+09 -3.231427e+07 9.090397e+08 0.0 1.396407e+07 0.0 0.0 0.0 0.0 0.0 1.396407e+07 0.0 1.396407e+07 0.0 1.396407e+07 23.000000 1385.000000 1408.000000 0.0 1408.000000 0.0 1408.000000 1408.000000 -0.777838 -0.007809 0.013195 0.013173
50% 128.960007 130.710007 127.860001 129.639999 129.107910 9.684900e+07 0.148503 0.755548 3.865648 3.008100 134.071972 124.362518 129.228572 1.767500 2.360681 2.900009 34.839791 1.554963 1.556372 0.117268 2.470001 1.405415 0.017791 56.505401 0.162093 53.493949 58.054357 57.544486 1.698163e+09 1.235467e+07 1.257479e+09 0.0 1.755337e+07 0.0 0.0 0.0 0.0 0.0 1.755337e+07 0.0 1.755337e+07 0.0 1.755337e+07 30.000000 1598.000000 1624.000000 0.0 1624.000000 0.0 1624.000000 1624.000000 0.148503 0.001484 0.017167 0.016877
75% 144.029999 146.320007 143.509995 145.369995 144.970901 1.261428e+08 1.319192 0.992497 7.672272 3.807343 148.292393 141.021576 145.084283 3.530257 2.976234 3.959999 52.376996 2.908638 2.802319 0.631128 6.750000 3.055451 0.055057 64.893640 0.244415 59.615310 78.321615 76.463326 1.834971e+09 4.936681e+07 1.498145e+09 0.0 2.805686e+07 0.0 0.0 0.0 0.0 0.0 2.805686e+07 0.0 2.805686e+07 0.0 2.805686e+07 42.000000 2039.000000 2078.000000 0.0 2078.000000 0.0 2078.000000 2078.000000 1.319192 0.013106 0.022270 0.022068
max 181.119995 182.130005 175.529999 179.449997 179.449997 3.743368e+08 10.468863 1.945962 59.485690 7.034749 183.443733 170.899386 175.817143 8.685160 6.198563 12.809998 88.606101 7.511480 6.895391 1.568174 22.639999 8.296348 0.226803 81.885208 0.698579 78.339314 99.641254 96.904729 2.443018e+09 2.149569e+08 2.278771e+09 0.0 3.066229e+08 0.0 0.0 0.0 0.0 0.0 3.066229e+08 0.0 3.066229e+08 0.0 3.066229e+08 236.000000 5378.000000 5587.000000 0.0 5587.000000 0.0 5587.000000 5587.000000 10.468863 0.099564 0.035331 0.035331
In [948]:
df.rename(columns = {'Real_or_Fake_tweet': 'Fake_news'}, inplace = True)
In [949]:
df = df.fillna(df.median())
In [950]:
df.isna().sum()
Out[950]:
Date                         0
Open                         0
High                         0
Low                          0
Close                        0
Adj Close                    0
Volume                       0
Return                       0
Beta                         0
Variance                     0
AvgTrueRange                 0
Upperband                    0
Lowerband                    0
Middleband                   0
APO                          0
NATR                         0
TRANGE                       0
DMI                          0
MACD                         0
MACDSIGNAL                   0
MACDHIST                     0
MOM                          0
PPO                          0
ROCP                         0
RSI                          0
TRIX                         0
ULTOSC                       0
SLOWK                        0
SLOWD                        0
AD                           0
ADOSC                        0
OBV                          0
Upward_momentum_created      0
Downward_momentum_created    0
B5_O_Um                      0
B5_C_Um                      0
B5_E_Um                      0
B5_A_Um                      0
B5_N_Um                      0
B5_O_Dm                      0
B5_C_Dm                      0
B5_E_Dm                      0
B5_A_Dm                      0
B5_N_Dm                      0
Verified_status_True         0
Verified_status_False        0
O                            0
C                            0
E                            0
A                            0
N                            0
Fake_news                    0
returns                      0
log_returns                  0
vol_current                  0
vol_future                   0
dtype: int64
In [951]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 373 entries, 68 to 440
Data columns (total 56 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Date                       373 non-null    datetime64[ns]
 1   Open                       373 non-null    float64       
 2   High                       373 non-null    float64       
 3   Low                        373 non-null    float64       
 4   Close                      373 non-null    float64       
 5   Adj Close                  373 non-null    float64       
 6   Volume                     373 non-null    int64         
 7   Return                     373 non-null    float64       
 8   Beta                       373 non-null    float64       
 9   Variance                   373 non-null    float64       
 10  AvgTrueRange               373 non-null    float64       
 11  Upperband                  373 non-null    float64       
 12  Lowerband                  373 non-null    float64       
 13  Middleband                 373 non-null    float64       
 14  APO                        373 non-null    float64       
 15  NATR                       373 non-null    float64       
 16  TRANGE                     373 non-null    float64       
 17  DMI                        373 non-null    float64       
 18  MACD                       373 non-null    float64       
 19  MACDSIGNAL                 373 non-null    float64       
 20  MACDHIST                   373 non-null    float64       
 21  MOM                        373 non-null    float64       
 22  PPO                        373 non-null    float64       
 23  ROCP                       373 non-null    float64       
 24  RSI                        373 non-null    float64       
 25  TRIX                       373 non-null    float64       
 26  ULTOSC                     373 non-null    float64       
 27  SLOWK                      373 non-null    float64       
 28  SLOWD                      373 non-null    float64       
 29  AD                         373 non-null    float64       
 30  ADOSC                      373 non-null    float64       
 31  OBV                        373 non-null    float64       
 32  Upward_momentum_created    373 non-null    float64       
 33  Downward_momentum_created  373 non-null    float64       
 34  B5_O_Um                    373 non-null    float64       
 35  B5_C_Um                    373 non-null    float64       
 36  B5_E_Um                    373 non-null    float64       
 37  B5_A_Um                    373 non-null    float64       
 38  B5_N_Um                    373 non-null    float64       
 39  B5_O_Dm                    373 non-null    float64       
 40  B5_C_Dm                    373 non-null    float64       
 41  B5_E_Dm                    373 non-null    float64       
 42  B5_A_Dm                    373 non-null    float64       
 43  B5_N_Dm                    373 non-null    float64       
 44  Verified_status_True       373 non-null    int64         
 45  Verified_status_False      373 non-null    int64         
 46  O                          373 non-null    int64         
 47  C                          373 non-null    int64         
 48  E                          373 non-null    int64         
 49  A                          373 non-null    int64         
 50  N                          373 non-null    int64         
 51  Fake_news                  373 non-null    int64         
 52  returns                    373 non-null    float64       
 53  log_returns                373 non-null    float64       
 54  vol_current                373 non-null    float64       
 55  vol_future                 373 non-null    float64       
dtypes: datetime64[ns](1), float64(46), int64(9)
memory usage: 166.1 KB
In [952]:
df.shape
Out[952]:
(373, 56)
In [953]:
df=df.dropna()
In [954]:
df.dtypes
Out[954]:
Date                         datetime64[ns]
Open                                float64
High                                float64
Low                                 float64
Close                               float64
Adj Close                           float64
Volume                                int64
Return                              float64
Beta                                float64
Variance                            float64
AvgTrueRange                        float64
Upperband                           float64
Lowerband                           float64
Middleband                          float64
APO                                 float64
NATR                                float64
TRANGE                              float64
DMI                                 float64
MACD                                float64
MACDSIGNAL                          float64
MACDHIST                            float64
MOM                                 float64
PPO                                 float64
ROCP                                float64
RSI                                 float64
TRIX                                float64
ULTOSC                              float64
SLOWK                               float64
SLOWD                               float64
AD                                  float64
ADOSC                               float64
OBV                                 float64
Upward_momentum_created             float64
Downward_momentum_created           float64
B5_O_Um                             float64
B5_C_Um                             float64
B5_E_Um                             float64
B5_A_Um                             float64
B5_N_Um                             float64
B5_O_Dm                             float64
B5_C_Dm                             float64
B5_E_Dm                             float64
B5_A_Dm                             float64
B5_N_Dm                             float64
Verified_status_True                  int64
Verified_status_False                 int64
O                                     int64
C                                     int64
E                                     int64
A                                     int64
N                                     int64
Fake_news                             int64
returns                             float64
log_returns                         float64
vol_current                         float64
vol_future                          float64
dtype: object
In [955]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(40,15))
sns.heatmap(df.corr(),annot=True)
Out[955]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f07831e0610>
In [956]:
df.hist(figsize=(20, 32), bins=70, xlabelsize=8, ylabelsize=8);
In [957]:
df_corr = df.corr()['AvgTrueRange'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with AvgTrueRange:\n{}".format(len(golden_features_list), golden_features_list))
There are 7 strongly correlated values with AvgTrueRange:
AvgTrueRange    1.000000
NATR            0.911441
vol_future      0.671767
TRANGE          0.661504
vol_current     0.660444
Variance        0.572390
Volume          0.565976
Name: AvgTrueRange, dtype: float64
In [958]:
df_corr = df.corr()['NATR'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with NATR :\n{}".format(len(golden_features_list), golden_features_list))
There are 8 strongly correlated values with NATR :
NATR            1.000000
AvgTrueRange    0.911441
vol_current     0.811933
vol_future      0.803957
Volume          0.643564
TRANGE          0.560180
TRIX            0.558977
Variance        0.512613
Name: NATR, dtype: float64
In [959]:
df_corr = df.corr()['TRANGE'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with TRANGE:\n{}".format(len(golden_features_list), golden_features_list))
There are 9 strongly correlated values with TRANGE:
TRANGE                   1.000000
Volume                   0.686354
AvgTrueRange             0.661504
Verified_status_False    0.619150
Fake_news                0.615505
N                        0.615505
E                        0.615505
O                        0.615505
NATR                     0.560180
Name: TRANGE, dtype: float64
In [960]:
df_corr = df.corr()['O'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Openness:\n{}".format(len(golden_features_list), golden_features_list))
There are 12 strongly correlated values with Openness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999745
Verified_status_True         0.813119
Volume                       0.623153
TRANGE                       0.615505
B5_N_Dm                      0.544418
B5_E_Dm                      0.544418
B5_O_Dm                      0.544418
Downward_momentum_created    0.544418
Name: O, dtype: float64
In [961]:
df_corr = df.corr()['C'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with conscientiousness:
Series([], Name: C, dtype: float64)
In [962]:
df_corr = df.corr()['E'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 12 strongly correlated values with conscientiousness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999745
Verified_status_True         0.813119
Volume                       0.623153
TRANGE                       0.615505
B5_N_Dm                      0.544418
B5_E_Dm                      0.544418
B5_O_Dm                      0.544418
Downward_momentum_created    0.544418
Name: E, dtype: float64
In [963]:
df_corr = df.corr()['A'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with conscientiousness:
Series([], Name: A, dtype: float64)
In [964]:
df_corr = df.corr()['N'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 12 strongly correlated values with conscientiousness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999745
Verified_status_True         0.813119
Volume                       0.623153
TRANGE                       0.615505
B5_N_Dm                      0.544418
B5_E_Dm                      0.544418
B5_O_Dm                      0.544418
Downward_momentum_created    0.544418
Name: N, dtype: float64
In [965]:
df.columns
Out[965]:
Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Return',
       'Beta', 'Variance', 'AvgTrueRange', 'Upperband', 'Lowerband',
       'Middleband', 'APO', 'NATR', 'TRANGE', 'DMI', 'MACD', 'MACDSIGNAL',
       'MACDHIST', 'MOM', 'PPO', 'ROCP', 'RSI', 'TRIX', 'ULTOSC', 'SLOWK',
       'SLOWD', 'AD', 'ADOSC', 'OBV', 'Upward_momentum_created',
       'Downward_momentum_created', 'B5_O_Um', 'B5_C_Um', 'B5_E_Um', 'B5_A_Um',
       'B5_N_Um', 'B5_O_Dm', 'B5_C_Dm', 'B5_E_Dm', 'B5_A_Dm', 'B5_N_Dm',
       'Verified_status_True', 'Verified_status_False', 'O', 'C', 'E', 'A',
       'N', 'Fake_news', 'returns', 'log_returns', 'vol_current',
       'vol_future'],
      dtype='object')
In [966]:
df_corr = df.corr()['B5_O_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_O_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_O_Um:
Series([], Name: B5_O_Um, dtype: float64)
In [967]:
df_corr = df.corr()['B5_C_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_C_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_C_Um:
Series([], Name: B5_C_Um, dtype: float64)
In [968]:
df_corr = df.corr()['B5_E_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_E_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_E_Um:
Series([], Name: B5_E_Um, dtype: float64)
In [969]:
df_corr = df.corr()['B5_A_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_A_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_A_Um:
Series([], Name: B5_A_Um, dtype: float64)
In [970]:
df_corr = df.corr()['B5_N_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_N_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_N_Um:
Series([], Name: B5_N_Um, dtype: float64)

Downward momentum correlation

In [971]:
df_corr = df.corr()['B5_O_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_O_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_O_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.733470
Fake_news                    0.544418
N                            0.544418
E                            0.544418
O                            0.544418
Verified_status_False        0.532996
Name: B5_O_Dm, dtype: float64
In [972]:
df_corr = df.corr()['B5_C_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_C_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_C_Dm:
Series([], Name: B5_C_Dm, dtype: float64)
In [973]:
df_corr = df.corr()['B5_E_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_E_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_E_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.733470
Fake_news                    0.544418
N                            0.544418
E                            0.544418
O                            0.544418
Verified_status_False        0.532996
Name: B5_E_Dm, dtype: float64
In [974]:
df_corr = df.corr()['B5_A_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_A_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_A_Dm:
Series([], Name: B5_A_Dm, dtype: float64)
In [975]:
df_corr = df.corr()['B5_N_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_N_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_N_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.733470
Fake_news                    0.544418
N                            0.544418
E                            0.544418
O                            0.544418
Verified_status_False        0.532996
Name: B5_N_Dm, dtype: float64
In [976]:
df_corr = df.corr()['Fake_news'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Real_or_Fake_tweet :\n{}".format(len(golden_features_list), golden_features_list))
There are 12 strongly correlated values with Real_or_Fake_tweet :
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999745
Verified_status_True         0.813119
Volume                       0.623153
TRANGE                       0.615505
B5_N_Dm                      0.544418
B5_E_Dm                      0.544418
B5_O_Dm                      0.544418
Downward_momentum_created    0.544418
Name: Fake_news, dtype: float64
In [977]:
df_corr = df.corr()['Downward_momentum_created'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Downward_momentum_created :\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with Downward_momentum_created :
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.733470
Fake_news                    0.544418
N                            0.544418
E                            0.544418
O                            0.544418
Verified_status_False        0.532996
Name: Downward_momentum_created, dtype: float64
In [978]:
df_corr = df.corr()['Upward_momentum_created'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Upward_momentum_created :\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with Upward_momentum_created :
Series([], Name: Upward_momentum_created, dtype: float64)
In [979]:
df_corr = df.corr()['Verified_status_True'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Verified_status_True :\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with Verified_status_True :
Verified_status_True         1.000000
Fake_news                    0.813119
N                            0.813119
E                            0.813119
O                            0.813119
Verified_status_False        0.799764
B5_N_Dm                      0.733470
B5_E_Dm                      0.733470
B5_O_Dm                      0.733470
Downward_momentum_created    0.733470
Name: Verified_status_True, dtype: float64
In [980]:
df_corr = df.corr()['Verified_status_False'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Verified_status_False :\n{}".format(len(golden_features_list), golden_features_list))
There are 12 strongly correlated values with Verified_status_False :
Verified_status_False        1.000000
Fake_news                    0.999745
N                            0.999745
E                            0.999745
O                            0.999745
Verified_status_True         0.799764
Volume                       0.624038
TRANGE                       0.619150
B5_N_Dm                      0.532996
B5_E_Dm                      0.532996
B5_O_Dm                      0.532996
Downward_momentum_created    0.532996
Name: Verified_status_False, dtype: float64
In [981]:
sns.set(font_scale=0.8)
In [982]:
for i in range(0, len(df.columns), 5):
    sns.pairplot(data=df,
                x_vars=df.columns[i:i+5],
                y_vars=['NATR'])
In [983]:
df.dtypes
Out[983]:
Date                         datetime64[ns]
Open                                float64
High                                float64
Low                                 float64
Close                               float64
Adj Close                           float64
Volume                                int64
Return                              float64
Beta                                float64
Variance                            float64
AvgTrueRange                        float64
Upperband                           float64
Lowerband                           float64
Middleband                          float64
APO                                 float64
NATR                                float64
TRANGE                              float64
DMI                                 float64
MACD                                float64
MACDSIGNAL                          float64
MACDHIST                            float64
MOM                                 float64
PPO                                 float64
ROCP                                float64
RSI                                 float64
TRIX                                float64
ULTOSC                              float64
SLOWK                               float64
SLOWD                               float64
AD                                  float64
ADOSC                               float64
OBV                                 float64
Upward_momentum_created             float64
Downward_momentum_created           float64
B5_O_Um                             float64
B5_C_Um                             float64
B5_E_Um                             float64
B5_A_Um                             float64
B5_N_Um                             float64
B5_O_Dm                             float64
B5_C_Dm                             float64
B5_E_Dm                             float64
B5_A_Dm                             float64
B5_N_Dm                             float64
Verified_status_True                  int64
Verified_status_False                 int64
O                                     int64
C                                     int64
E                                     int64
A                                     int64
N                                     int64
Fake_news                             int64
returns                             float64
log_returns                         float64
vol_current                         float64
vol_future                          float64
dtype: object
In [984]:
df.isnull().sum()
Out[984]:
Date                         0
Open                         0
High                         0
Low                          0
Close                        0
Adj Close                    0
Volume                       0
Return                       0
Beta                         0
Variance                     0
AvgTrueRange                 0
Upperband                    0
Lowerband                    0
Middleband                   0
APO                          0
NATR                         0
TRANGE                       0
DMI                          0
MACD                         0
MACDSIGNAL                   0
MACDHIST                     0
MOM                          0
PPO                          0
ROCP                         0
RSI                          0
TRIX                         0
ULTOSC                       0
SLOWK                        0
SLOWD                        0
AD                           0
ADOSC                        0
OBV                          0
Upward_momentum_created      0
Downward_momentum_created    0
B5_O_Um                      0
B5_C_Um                      0
B5_E_Um                      0
B5_A_Um                      0
B5_N_Um                      0
B5_O_Dm                      0
B5_C_Dm                      0
B5_E_Dm                      0
B5_A_Dm                      0
B5_N_Dm                      0
Verified_status_True         0
Verified_status_False        0
O                            0
C                            0
E                            0
A                            0
N                            0
Fake_news                    0
returns                      0
log_returns                  0
vol_current                  0
vol_future                   0
dtype: int64
In [985]:
df.fillna(0, inplace = True)
In [986]:
df.dropna(inplace=True)
In [987]:
sns.set(font_scale=0.8)
In [988]:
corr = df.drop('Close', axis=1).corr() 
plt.figure(figsize=(12, 10))

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)], 
            cmap='YlGnBu', vmax=1.0, vmin=-1.0, linewidths=0.1,
            annot=True, annot_kws={"size": 8}, square=True);
In [989]:
df.describe()
Out[989]:
Open High Low Close Adj Close Volume Return Beta Variance AvgTrueRange Upperband Lowerband Middleband APO NATR TRANGE DMI MACD MACDSIGNAL MACDHIST MOM PPO ROCP RSI TRIX ULTOSC SLOWK SLOWD AD ADOSC OBV Upward_momentum_created Downward_momentum_created B5_O_Um B5_C_Um B5_E_Um B5_A_Um B5_N_Um B5_O_Dm B5_C_Dm B5_E_Dm B5_A_Dm B5_N_Dm Verified_status_True Verified_status_False O C E A N Fake_news returns log_returns vol_current vol_future
count 373.000000 373.000000 373.000000 373.000000 373.000000 3.730000e+02 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000 3.730000e+02 3.730000e+02 3.730000e+02 373.0 3.730000e+02 373.0 373.0 373.0 373.0 373.0 3.730000e+02 373.0 3.730000e+02 373.0 3.730000e+02 373.000000 373.000000 373.000000 373.0 373.000000 373.0 373.000000 373.000000 373.000000 373.000000 373.000000 373.000000
mean 130.791998 132.289451 129.274222 130.838921 130.280209 1.091079e+08 0.196225 0.763967 6.495903 3.258898 134.624488 125.696830 130.160659 1.507587 2.541936 3.312500 36.309697 1.481322 1.454243 0.027079 2.279759 1.251722 0.019799 56.713070 0.193735 53.693181 56.931604 56.983372 1.668262e+09 1.222484e+07 1.163158e+09 0.0 2.962218e+07 0.0 0.0 0.0 0.0 0.0 2.962218e+07 0.0 2.962218e+07 0.0 2.962218e+07 39.337802 1879.943700 1919.281501 0.0 1919.281501 0.0 1919.281501 1919.281501 0.196225 0.001771 0.018585 0.018385
std 17.309654 17.431134 17.291613 17.376804 17.625371 4.952592e+07 1.953292 0.399802 8.384719 0.976497 17.292387 17.307655 17.123570 3.439116 0.874476 1.787296 22.032415 2.282406 2.071192 0.779085 7.087490 2.797648 0.057755 11.400372 0.194147 9.237370 24.247599 22.261619 2.990677e+08 6.505526e+07 5.533949e+08 0.0 3.420458e+07 0.0 0.0 0.0 0.0 0.0 3.420458e+07 0.0 3.420458e+07 0.0 3.420458e+07 31.336769 807.629326 832.903826 0.0 832.903826 0.0 832.903826 832.903826 1.953292 0.019495 0.006233 0.006148
min 88.312500 90.542503 87.820000 90.445000 89.574516 4.100000e+07 -8.006086 -0.483506 0.100885 1.862693 92.352753 85.935328 89.836785 -6.866250 1.356956 0.862495 0.140086 -3.539473 -2.963971 -2.510746 -22.049995 -5.812268 -0.164331 28.279096 -0.097785 30.909286 5.175548 11.363815 8.367979e+08 -1.871199e+08 -6.199268e+08 0.0 8.247484e+06 0.0 0.0 0.0 0.0 0.0 8.247484e+06 0.0 8.247484e+06 0.0 8.247484e+06 10.000000 797.000000 807.000000 0.0 807.000000 0.0 807.000000 807.000000 -8.006086 -0.083448 0.010033 0.010033
25% 119.900002 121.169998 118.150002 119.900002 119.343437 7.583400e+07 -0.777838 0.511713 1.881669 2.595455 123.673900 114.227710 119.188571 -0.445129 1.946684 2.029999 18.021302 -0.166436 -0.099720 -0.500311 -2.319992 -0.359148 -0.016674 48.301657 0.042848 47.230665 36.484828 38.489746 1.511316e+09 -3.231427e+07 9.090397e+08 0.0 1.396407e+07 0.0 0.0 0.0 0.0 0.0 1.396407e+07 0.0 1.396407e+07 0.0 1.396407e+07 23.000000 1385.000000 1408.000000 0.0 1408.000000 0.0 1408.000000 1408.000000 -0.777838 -0.007809 0.013410 0.013410
50% 128.960007 130.710007 127.860001 129.639999 129.107910 9.684900e+07 0.148503 0.755548 3.865648 3.008100 134.071972 124.362518 129.228572 1.767500 2.360681 2.900009 34.839791 1.554963 1.556372 0.117268 2.470001 1.405415 0.017791 56.505401 0.162093 53.493949 58.054357 57.544486 1.698163e+09 1.235467e+07 1.257479e+09 0.0 1.755337e+07 0.0 0.0 0.0 0.0 0.0 1.755337e+07 0.0 1.755337e+07 0.0 1.755337e+07 30.000000 1598.000000 1624.000000 0.0 1624.000000 0.0 1624.000000 1624.000000 0.148503 0.001484 0.017167 0.016877
75% 144.029999 146.320007 143.509995 145.369995 144.970901 1.261428e+08 1.319192 0.992497 7.672272 3.807343 148.292393 141.021576 145.084283 3.530257 2.976234 3.959999 52.376996 2.908638 2.802319 0.631128 6.750000 3.055451 0.055057 64.893640 0.244415 59.615310 78.321615 76.463326 1.834971e+09 4.936681e+07 1.498145e+09 0.0 2.805686e+07 0.0 0.0 0.0 0.0 0.0 2.805686e+07 0.0 2.805686e+07 0.0 2.805686e+07 42.000000 2039.000000 2078.000000 0.0 2078.000000 0.0 2078.000000 2078.000000 1.319192 0.013106 0.022062 0.021889
max 181.119995 182.130005 175.529999 179.449997 179.449997 3.743368e+08 10.468863 1.945962 59.485690 7.034749 183.443733 170.899386 175.817143 8.685160 6.198563 12.809998 88.606101 7.511480 6.895391 1.568174 22.639999 8.296348 0.226803 81.885208 0.698579 78.339314 99.641254 96.904729 2.443018e+09 2.149569e+08 2.278771e+09 0.0 3.066229e+08 0.0 0.0 0.0 0.0 0.0 3.066229e+08 0.0 3.066229e+08 0.0 3.066229e+08 236.000000 5378.000000 5587.000000 0.0 5587.000000 0.0 5587.000000 5587.000000 10.468863 0.099564 0.035331 0.035331
In [990]:
# DROPPING ALL NaN VALUES
df.dropna(inplace=True)
In [991]:
n_zoom = 365
sns.set_context("talk", font_scale=1.3)
# plt.style.use(['seaborn'])

# VISUALIZE REALIZED CURRENT VS. FUTURE VOLATILITY
with sns.axes_style("whitegrid"):
    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(18,14))

    ax1.plot(df.vol_current, alpha=.8, lw=1, color='gray', ls=':',
            label='Current Volatility')
    ax1.plot(df.vol_future, lw=1, color='blue',
            label=f'Next {n_future} Days Volatility (TARGET)')

    ax2.plot(df.vol_current[-n_zoom:], alpha=.8, lw=2, color='gray', ls=':',
            label='Current Volatility')
    ax2.plot(df.vol_future[-n_zoom:], lw=2, color='blue',
            label=f'Next {n_future} Days Volatility (TARGET)')

    ax1.title.set_text(f'Future vs. Current Daily Volatility \n Using {INTERVAL_WINDOW}-Day Interval')
    ax2.title.set_text(f'Zooming in the Last {n_zoom} Days')

    ax1.legend(loc='upper left', prop={'size': 13}, frameon=True)
    ax2.legend(loc='upper left', prop={'size': 13}, frameon=True)
    plt.tight_layout()
    
    plt.show();

Daily Volatility Distribution

In [992]:
with sns.axes_style("darkgrid"):
    fig, ax = plt.subplots(figsize=(10,6))
    sns.distplot(df.vol_current, norm_hist=True, fit=stats.norm,
                bins=50, ax=ax)
    plt.title('Daily Volatility Distribution')
    
    plt.show();

Experiment 2: weekly granularity

In [993]:
w = widgets.Dropdown(
    options=['SELECT','AAPL', 'ABUS', 'ARDS', 'BABA','BFRI', 
             'FB', 'GME', 'MCD','PFE', 'PLUG', 
             'QCOM', 'SENS','TSLA', 'TWTR', 'UUUU'],
    value='SELECT',
    description ='Stock name:',

)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print("You have selected %s" % change['new'])

w.observe(on_change)

display(w)
You have selected AAPL
In [998]:
if(w.value == 'AAPL'):
  df = pd.read_csv('/content/Final_AAPL.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'ABUS'):
  df = pd.read_csv('/content/Final_ABUS.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'ARDS'):
  df = pd.read_csv('/content/Final_ARDS.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'BABA'):
  df = pd.read_csv('/content/Final_BABA.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'BFRI'):
  df = pd.read_csv('/content/Final_BFRI.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'FB'):
  df = pd.read_csv('/content/Final_FB.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'GME'):
  df = pd.read_csv('/content/Final_GME.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'MCD'):
  df = pd.read_csv('/content/Final_MCD.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'PFE'):
  df = pd.read_csv('/content/Final_PFE.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'PLUG'):
  df = pd.read_csv('/content/Final_PLUG.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'QCOM'):
  df = pd.read_csv('/content/Final_QCOM.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'SENS'):
  df = pd.read_csv('/content/Final_SENS.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'TSLA'):
  df = pd.read_csv('/content/Final_TSLA.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'TWTR'):
  df = pd.read_csv('/content/Final_TWTR.csv', parse_dates=['Date'], index_col=['Date'])
if(w.value == 'UUUU'):
  df = pd.read_csv('/content/Final_UUUU.csv', parse_dates=['Date'], index_col=['Date'])
In [999]:
df.columns
Out[999]:
Index(['Unnamed: 0', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'Return', 'Beta', 'Variance', 'AvgTrueRange', 'Upperband', 'Lowerband',
       'Middleband', 'APO', 'NATR', 'TRANGE', 'DMI', 'MACD', 'MACDSIGNAL',
       'MACDHIST', 'MOM', 'PPO', 'ROCP', 'RSI', 'TRIX', 'ULTOSC', 'SLOWK',
       'SLOWD', 'AD', 'ADOSC', 'OBV', 'Upward_momentum_created',
       'Downward_momentum_created', 'B5_O_Um', 'B5_C_Um', 'B5_E_Um', 'B5_A_Um',
       'B5_N_Um', 'B5_O_Dm', 'B5_C_Dm', 'B5_E_Dm', 'B5_A_Dm', 'B5_N_Dm',
       'Verified_status_True', 'Verified_status_False', 'O', 'C', 'E', 'A',
       'N', 'Real_or_Fake_tweet'],
      dtype='object')
In [1000]:
df.shape
Out[1000]:
(441, 52)
In [1001]:
df.isnull().sum()
Out[1001]:
Unnamed: 0                    0
Open                          0
High                          0
Low                           0
Close                         0
Adj Close                     0
Volume                        0
Return                        0
Beta                          0
Variance                      0
AvgTrueRange                  0
Upperband                     0
Lowerband                     0
Middleband                    0
APO                           5
NATR                          0
TRANGE                        0
DMI                           0
MACD                         13
MACDSIGNAL                   13
MACDHIST                     13
MOM                           0
PPO                           5
ROCP                          0
RSI                           0
TRIX                         68
ULTOSC                        8
SLOWK                         0
SLOWD                         0
AD                            0
ADOSC                         0
OBV                           0
Upward_momentum_created       0
Downward_momentum_created     0
B5_O_Um                       0
B5_C_Um                       0
B5_E_Um                       0
B5_A_Um                       0
B5_N_Um                       0
B5_O_Dm                       0
B5_C_Dm                       0
B5_E_Dm                       0
B5_A_Dm                       0
B5_N_Dm                       0
Verified_status_True          0
Verified_status_False         0
O                             0
C                             0
E                             0
A                             0
N                             0
Real_or_Fake_tweet            0
dtype: int64
In [1002]:
df = df.fillna(df.median())
del df['Unnamed: 0']
df.rename(columns = {'Real_or_Fake_tweet': 'Fake_news'}, inplace = True)
In [1003]:
df_weekly = df.resample('W').agg('mean')
In [1004]:
df_weekly.shape
Out[1004]:
(91, 51)
In [1005]:
plt.figure(figsize=(40,15))
sns.heatmap(df_weekly.corr(),annot=True)
Out[1005]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0781abb550>
In [1006]:
sns.set(font_scale=0.8)
In [1007]:
df_weekly.hist(figsize=(20, 32), bins=50, xlabelsize=8, ylabelsize=8);
In [1008]:
df_corr = df_weekly.corr()['AvgTrueRange'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with AvgTrueRange:\n{}".format(len(golden_features_list), golden_features_list))
There are 9 strongly correlated values with AvgTrueRange:
AvgTrueRange             1.000000
TRANGE                   0.795245
NATR                     0.714859
Variance                 0.632613
Verified_status_False    0.508551
Fake_news                0.505670
N                        0.505670
E                        0.505670
O                        0.505670
Name: AvgTrueRange, dtype: float64
In [1009]:
df_corr = df_weekly.corr()['NATR'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with NATR :\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with NATR :
NATR            1.000000
Volume          0.768216
AvgTrueRange    0.714859
Middleband     -0.503830
High           -0.506781
Open           -0.514168
Close          -0.522235
Adj Close      -0.523514
Low            -0.531570
Lowerband      -0.546996
Name: NATR, dtype: float64
In [1010]:
df_corr = df_weekly.corr()['TRANGE'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with TRANGE:\n{}".format(len(golden_features_list), golden_features_list))
There are 8 strongly correlated values with TRANGE:
TRANGE                   1.000000
AvgTrueRange             0.795245
Verified_status_False    0.701652
Fake_news                0.699988
N                        0.699988
E                        0.699988
O                        0.699988
Variance                 0.578818
Name: TRANGE, dtype: float64
In [1011]:
df_corr = df_weekly.corr()['O'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Openness:\n{}".format(len(golden_features_list), golden_features_list))
There are 15 strongly correlated values with Openness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999792
Verified_status_True         0.788912
TRANGE                       0.699988
B5_N_Dm                      0.633404
B5_E_Dm                      0.633404
B5_O_Dm                      0.633404
Downward_momentum_created    0.633404
AD                           0.619411
OBV                          0.553680
AvgTrueRange                 0.505670
Upperband                    0.502106
Name: O, dtype: float64
In [1012]:
df_corr = df_weekly.corr()['C'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with conscientiousness:
Series([], Name: C, dtype: float64)
In [1013]:
df_corr = df_weekly.corr()['E'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 15 strongly correlated values with conscientiousness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999792
Verified_status_True         0.788912
TRANGE                       0.699988
B5_N_Dm                      0.633404
B5_E_Dm                      0.633404
B5_O_Dm                      0.633404
Downward_momentum_created    0.633404
AD                           0.619411
OBV                          0.553680
AvgTrueRange                 0.505670
Upperband                    0.502106
Name: E, dtype: float64
In [1014]:
df_corr = df_weekly.corr()['A'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with conscientiousness:
Series([], Name: A, dtype: float64)
In [1015]:
df_corr = df_weekly.corr()['N'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with conscientiousness:\n{}".format(len(golden_features_list), golden_features_list))
There are 15 strongly correlated values with conscientiousness:
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999792
Verified_status_True         0.788912
TRANGE                       0.699988
B5_N_Dm                      0.633404
B5_E_Dm                      0.633404
B5_O_Dm                      0.633404
Downward_momentum_created    0.633404
AD                           0.619411
OBV                          0.553680
AvgTrueRange                 0.505670
Upperband                    0.502106
Name: N, dtype: float64
In [1016]:
df_corr = df_weekly.corr()['B5_O_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_O_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_O_Um:
Series([], Name: B5_O_Um, dtype: float64)
In [1017]:
df_corr = df_weekly.corr()['B5_C_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_C_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_C_Um:
Series([], Name: B5_C_Um, dtype: float64)
In [1018]:
df_corr = df_weekly.corr()['B5_E_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_E_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_E_Um:
Series([], Name: B5_E_Um, dtype: float64)
In [1019]:
df_corr = df_weekly.corr()['B5_A_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_A_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_A_Um:
Series([], Name: B5_A_Um, dtype: float64)
In [1020]:
df_corr = df_weekly.corr()['B5_N_Um'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_N_Um:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_N_Um:
Series([], Name: B5_N_Um, dtype: float64)

Downward momentum correlation

In [1021]:
df_corr = df_weekly.corr()['B5_O_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_O_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_O_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.849395
Fake_news                    0.633404
N                            0.633404
E                            0.633404
O                            0.633404
Verified_status_False        0.621656
Name: B5_O_Dm, dtype: float64
In [1022]:
df_corr = df_weekly.corr()['B5_C_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_C_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_C_Dm:
Series([], Name: B5_C_Dm, dtype: float64)
In [1023]:
df_corr = df_weekly.corr()['B5_E_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_E_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_E_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.849395
Fake_news                    0.633404
N                            0.633404
E                            0.633404
O                            0.633404
Verified_status_False        0.621656
Name: B5_E_Dm, dtype: float64
In [1024]:
df_corr = df_weekly.corr()['B5_A_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_A_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with B5_A_Dm:
Series([], Name: B5_A_Dm, dtype: float64)
In [1025]:
df_corr = df_weekly.corr()['B5_N_Dm'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with B5_N_Dm:\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with B5_N_Dm:
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.849395
Fake_news                    0.633404
N                            0.633404
E                            0.633404
O                            0.633404
Verified_status_False        0.621656
Name: B5_N_Dm, dtype: float64
In [1026]:
df_corr = df_weekly.corr()['Fake_news'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Real_or_Fake_tweet :\n{}".format(len(golden_features_list), golden_features_list))
There are 15 strongly correlated values with Real_or_Fake_tweet :
Fake_news                    1.000000
N                            1.000000
E                            1.000000
O                            1.000000
Verified_status_False        0.999792
Verified_status_True         0.788912
TRANGE                       0.699988
B5_N_Dm                      0.633404
B5_E_Dm                      0.633404
B5_O_Dm                      0.633404
Downward_momentum_created    0.633404
AD                           0.619411
OBV                          0.553680
AvgTrueRange                 0.505670
Upperband                    0.502106
Name: Fake_news, dtype: float64
In [1027]:
df_corr = df_weekly.corr()['Downward_momentum_created'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Downward_momentum_created :\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with Downward_momentum_created :
B5_N_Dm                      1.000000
B5_E_Dm                      1.000000
B5_O_Dm                      1.000000
Downward_momentum_created    1.000000
Verified_status_True         0.849395
Fake_news                    0.633404
N                            0.633404
E                            0.633404
O                            0.633404
Verified_status_False        0.621656
Name: Downward_momentum_created, dtype: float64
In [1028]:
df_corr = df_weekly.corr()['Upward_momentum_created'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Upward_momentum_created :\n{}".format(len(golden_features_list), golden_features_list))
There are 0 strongly correlated values with Upward_momentum_created :
Series([], Name: Upward_momentum_created, dtype: float64)
In [1029]:
df_corr = df_weekly.corr()['Verified_status_True'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Verified_status_True :\n{}".format(len(golden_features_list), golden_features_list))
There are 10 strongly correlated values with Verified_status_True :
Verified_status_True         1.000000
B5_N_Dm                      0.849395
B5_E_Dm                      0.849395
B5_O_Dm                      0.849395
Downward_momentum_created    0.849395
Fake_news                    0.788912
N                            0.788912
E                            0.788912
O                            0.788912
Verified_status_False        0.776205
Name: Verified_status_True, dtype: float64
In [1030]:
df_corr = df_weekly.corr()['Verified_status_False'] 
golden_features_list = df_corr[abs(df_corr) > 0.5].sort_values(ascending=False)
print("There are {} strongly correlated values with Verified_status_False :\n{}".format(len(golden_features_list), golden_features_list))
There are 16 strongly correlated values with Verified_status_False :
Verified_status_False        1.000000
Fake_news                    0.999792
N                            0.999792
E                            0.999792
O                            0.999792
Verified_status_True         0.776205
TRANGE                       0.701652
AD                           0.624406
B5_N_Dm                      0.621656
B5_E_Dm                      0.621656
B5_O_Dm                      0.621656
Downward_momentum_created    0.621656
OBV                          0.558693
AvgTrueRange                 0.508551
Upperband                    0.507719
High                         0.501437
Name: Verified_status_False, dtype: float64
In [1031]:
sns.set(font_scale=0.8)
In [1032]:
for i in range(0, len(df_weekly.columns), 5):
    sns.pairplot(data=df_weekly,
                x_vars=df_weekly.columns[i:i+5],
                y_vars=['NATR'])
In [1033]:
df_weekly.fillna(0, inplace = True)
In [1034]:
df_weekly.dropna(inplace=True)
In [1035]:
corr = df_weekly.drop('Close', axis=1).corr() 
plt.figure(figsize=(12, 10))

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)], 
            cmap='YlGnBu', vmax=1.0, vmin=-1.0, linewidths=0.1,
            annot=True, annot_kws={"size": 8}, square=True);

Weekly volatility distribution

In [1036]:
with sns.axes_style("darkgrid"):
    fig, ax = plt.subplots(figsize=(10,6))
    sns.distplot(df_weekly.NATR, norm_hist=True, fit=stats.norm,
                bins=50, ax=ax)
    plt.title('Weekly Volatility Distribution')
    
    plt.show();